rm(list = ls())
pacman::p_load(caret, tidyverse, data.table, viridis, haven, Rfast, glmnet, fixest, egg, modelsummary, kableExtra, car, purrr) 

dir = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(dir)

options(modelsummary_factory_default = 'kableExtra')
set.seed(11215)

################################################################################
### DESCRIPTIVES ###############################################################
################################################################################

### Figure 1 (Fact-check characteristics) ######################################

topics <- fread("Data/topics.csv")

### Figure 1a: Topics fact-checked ###

### Bar plot for categories ###

d <- topics %>% summarise(across(c(politics, economic, race_and_nationalism, health_covid, health_noncovid, crime, society, fun_fact), mean))
d <- data.frame(Proportion=t(d)) 
d$topic <- c("Politics", "Economy", "Race/Xenophobia", "COVID-19", "Other Health", "Crime", "Society", "Misc/Fun facts")

p <- ggplot(d, aes(y=Proportion, x=topic)) + theme_bw() +
  geom_col(fill="darkorchid4") + xlab("") + 
  scale_fill_viridis(discrete=T) +
  theme(axis.text.x=element_text(angle=90,vjust = 0.5,hjust=1))

ggsave("Figures/Figure 1a.pdf",p,width=5.5,height=4.5)

### Figure 1b: Accuracy of facts checked ###

acc_order <- c("T", "F", "M", "U")

d <- topics %>% 
  group_by(accuracy) %>% 
  summarise(n = n()) %>%
  mutate(prop = 100*(n/sum(n))) %>% 
  mutate(prop_txt = paste0(round(prop,0),"%")) %>% 
  slice(match(acc_order, accuracy))

d$Accuracy <- c("True", "False", "Misleading", "Uncertain")
d$Accuracy <- factor(d$Accuracy, levels = d$Accuracy)

d <- d %>% arrange(desc(Accuracy)) %>% mutate(ypos = cumsum(prop) - 0.5*prop)

p <- ggplot(d, aes(y=prop, x="", fill=Accuracy)) +
  geom_bar(stat="identity", width=1, color="white") +
  xlab("Accuracy") +
  coord_polar("y", start=120) +
  scale_fill_viridis(discrete=T) +
  geom_label(aes(y = ypos, label = prop_txt), fill = "white", nudge_x=c(0.25,0.25,0,0.25), size=5) +
  theme_void()

ggsave("Figures/Figure 1b.pdf",p,width=5.5,height=4)

### Figure C2 (Sample characteristics) #########################################

d <- read_dta("Data/survey_data.dta")

sum_d <- d %>% dplyr::filter(in_endline==1) %>%
  dplyr::summarise(Female=mean(na.omit(dem_q2_1)),Urban=mean(na.omit(dem_q5_1+dem_q5_2)),
                   `18-24`=mean(na.omit(val_q2_1)), `25-34`=mean(na.omit(val_q2_2)), `35-44`=mean(na.omit(val_q2_3)), `45-54`=mean(na.omit(val_q2_4)), `55+`=mean(na.omit(val_q2_5)),
                   `Primary`=mean(na.omit(dem_q3_1)), `Secondary`=mean(na.omit(dem_q3_2)), `University`=mean(na.omit(dem_q3_3)),
                   Zulu=mean(na.omit(end_personal_q1_1)), Xhosa=mean(na.omit(end_personal_q1_2)), Afrikaans=mean(na.omit(end_personal_q1_3)), English=mean(na.omit(end_personal_q1_4)), Sotho=mean(na.omit(end_personal_q1_5)), Ndebele=mean(na.omit(end_personal_q1_6)),
                   `Eastern\nCape`=mean(na.omit(dem_q4_1)), `Free\nState`=mean(na.omit(dem_q4_2)), Gauteng=mean(na.omit(dem_q4_3)), `KwaZulu-\nNatal`=mean(na.omit(dem_q4_4)), Limpopo=mean(na.omit(dem_q4_5)), 
                   Mpumalanga=mean(na.omit(dem_q4_6)), `Northern\nCape`=mean(na.omit(dem_q4_7)), `North\nWest`=mean(na.omit(dem_q4_8)), `Western\nCape`=mean(na.omit(dem_q4_9))) %>% 
  tidyr::pivot_longer(everything())
sum_d$sample <- "Endline"

ab7 <- fread("Data/ab7.csv")

make.ab.sum <- function(sample){
  if(sample=="AB7 All"){ab <- ab7}
  if(sample=="AB7 Social Media"){ab <- ab7 %>% filter(Uses_SM==1)}

  sum <- ab %>%
    summarise(across(c(Female, Urban), function(x) weighted.mean(x,withinwt)),
              `18-24`=weighted.mean(as.numeric(Age) %in% 18:24, withinwt),`25-34`=weighted.mean(as.numeric(Age) %in% 25:34, withinwt),`35-44`=weighted.mean(as.numeric(Age) %in% 35:44, withinwt),`45-54`=weighted.mean(as.numeric(Age) %in% 45:54, withinwt),`55+`=weighted.mean(as.numeric(Age) %in% 55:64, withinwt),
              `Primary`=weighted.mean(na.omit(Education_Primary),withinwt),`Secondary`=weighted.mean(na.omit(Education_Secondary),withinwt),`University`=weighted.mean(na.omit(Education_University),withinwt),
              Zulu=weighted.mean(na.omit(Ethnic_Zulu==1),withinwt), Xhosa=weighted.mean(na.omit(Ethnic_Xhosa==1),withinwt), Afrikaans=weighted.mean(na.omit(Ethnic_Afrikaans==1),withinwt), English=weighted.mean(na.omit(Ethnic_English==1),withinwt), Sotho=weighted.mean(na.omit(Ethnic_Sotho==1),withinwt), Ndebele=weighted.mean(na.omit(Ethnic_Ndebele==1),withinwt),
              `Eastern\nCape`=weighted.mean(na.omit(Province=="Eastern Cape"),withinwt), `Free\nState`=weighted.mean(na.omit(Province=="Free State"),withinwt), Gauteng=weighted.mean(na.omit(Province=="Gauteng"),withinwt), `KwaZulu-\nNatal`=weighted.mean(na.omit(Province=="KwaZulu-Natal"),withinwt), Limpopo=weighted.mean(na.omit(Province=="Limpopo"),withinwt),Mpumalanga=weighted.mean(na.omit(Province=="Mpumalanga"),withinwt), `Northern\nCape`=weighted.mean(na.omit(Province=="Northern Cape"),withinwt), `North\nWest`=weighted.mean(na.omit(Province=="North West"),withinwt), `Western\nCape`=weighted.mean(na.omit(Province=="Western Cape"),withinwt)) %>%  
    tidyr::pivot_longer(everything())

  sum$sample <- sample
  return(sum)
}

sum_ab_all <- make.ab.sum('AB7 All')
sum_ab_sm  <- make.ab.sum('AB7 Social Media')

sum <- dplyr::bind_rows(sum_d,sum_ab_all,sum_ab_sm)
sum$sample <- factor(sum$sample, levels=c("AB7 All","AB7 Social Media","Endline"))

sum_dem <- sum[sum$name%in%c("18-24","25-34","35-44","45-54","55+","Female","Urban"),]
sum_ed <- sum[sum$name%in%c("Primary","Secondary","University"),]
sum_province <- sum[sum$name%in%c("Eastern\nCape","Free\nState","Gauteng","KwaZulu-\nNatal","Limpopo","Mpumalanga","Northern\nCape","North\nWest","Western\nCape"),]
sum_ethnic <- sum[sum$name%in%c("Zulu","Xhosa","Afrikaans","English","Sotho","Ndebele"),]

make.comparison.plot <- function(df){
   p <- ggplot(data=df, aes(x=name,y=value,fill=factor(sample),group=factor(sample)))+
     theme_bw()+xlab(NULL)+geom_col(position="dodge2",width=0.5)+ylab(NULL)+scale_fill_viridis(discrete=T)+
     theme(legend.title = element_blank())
   return(p)
}
 
ggsave("Figures/Figure C2a.pdf",make.comparison.plot(sum_dem),width=5.5,height=3)
ggsave("Figures/Figure C2b.pdf",make.comparison.plot(sum_ed),width=5.5,height=3)
ggsave("Figures/Figure C2c.pdf",make.comparison.plot(sum_ethnic),width=5.5,height=3)
ggsave("Figures/Figure C2d.pdf",make.comparison.plot(sum_province),width=8,height=3)

### Figure C3 (Quiz completion) ################################################

quiz <- fread("Data/quiz_completion.csv")
quiz <- left_join(quiz, d %>% mutate(t0=as_factor(t0),t5=as_factor(t5)) %>% select(id,t0,t5), by="id") # merge in treatment assignment information

quiz <- quiz %>% 
  left_join(d %>% mutate(t0=as_factor(t0),t5=as_factor(t5)) %>% group_by(t0) %>% summarise(n_t0=n()), by="t0") %>% 
  left_join(d %>% mutate(t0=as_factor(t0),t5=as_factor(t5)) %>% group_by(t5) %>% summarise(n_t5=n()), by="t5")

quiz_sum <- quiz %>% group_by(index) %>% summarize(`Share completed quiz`=n()/dim(d)[1], `Share receiving high\nincentives in quiz`=mean(high, na.rm=T)) %>% pivot_longer(2:3)
quiz_sum$name <- factor(quiz_sum$name, levels=unique(quiz_sum$name))

quiz_sum0 <- quiz %>% group_by(index,t0) %>% # pooled treatment vector
  summarize(`Share completed quiz`=n()/first(n_t0), `Share receiving high\nincentives in quiz`=mean(high, na.rm=T)) 
quiz_sum0 <- quiz_sum0 %>% pivot_longer(3:4) %>% filter(!is.na(t0))
quiz_sum0$name <- factor(quiz_sum0$name, levels=unique(quiz_sum0$name))

quiz_sum5 <- quiz %>% group_by(index,t5) %>% # disaggregated treatment vector
  summarize(`Share completed quiz`=n()/first(n_t5), `Share receiving high\nincentives in quiz`=mean(high, na.rm=T)) 
quiz_sum5 <- quiz_sum5 %>% pivot_longer(3:4) %>% filter(!is.na(t5))
quiz_sum5$name <- factor(quiz_sum5$name, levels=unique(quiz_sum5$name))

p <- ggplot(data=quiz_sum, aes(x=index, y=value, color=name, group=name)) + theme_bw() +
  geom_line()+geom_point()+ylim(c(0,1))+scale_color_viridis(discrete=T)+
  xlab("Quiz index")+ylab(NULL)+theme(strip.background=element_rect(fill="white"))+
  theme(legend.title=element_blank(), legend.position="top")+guides(fill = guide_legend(reverse = TRUE))+scale_x_continuous(breaks=seq(1,4,1))
ggsave(file="Figures/Figure C3a.pdf", plot = p, width=4, height=3.5)

p0 <- ggplot(data=quiz_sum0, aes(x=index, y=value, color=t0, group=t0)) + theme_bw() +
  geom_line()+geom_point()+ylim(c(0,1))+scale_color_viridis(discrete=T)+
  xlab("Quiz index")+ylab(NULL)+facet_grid(cols=vars(name))+theme(strip.background=element_rect(fill="white"))+
  theme(legend.title=element_blank(), legend.position="top")+guides(fill = guide_legend(reverse = TRUE))+scale_x_continuous(breaks=seq(1,4,1))
ggsave(file="Figures/Figure C3b.pdf", plot = p0, width=4, height=3.5)

p5 <- ggplot(data=quiz_sum5, aes(x=index, y=value, color=t5, group=t5)) + theme_bw() +
  geom_line()+geom_point()+ylim(c(0,1))+scale_color_viridis(discrete=T)+
  xlab("Quiz index")+ylab(NULL)+facet_grid(cols=vars(name))+theme(strip.background=element_rect(fill="white"))+
  theme(legend.title=element_blank(), legend.position="top")+guides(fill = guide_legend(reverse = TRUE))+scale_x_continuous(breaks=seq(1,4,1))
ggsave(file="Figures/Figure C3c.pdf", plot = p5, width=4, height=3.5)


### Table 2 (Outcome variables) ################################################

map <- fread('Data/variable_map.csv')

gen.summ <- function(index){
  print(index)
  
  dat <- d %>% filter(in_endline==1)
  
  vars <- unique(na.omit(map$endline_var[map$index==index]))
  labs <- unique(na.omit(map$lab[map$index==index]))
  lab_index <- map$lab[map$endline_var==paste0("end_icw_",index)]
  
  to_flip <- vars[grepl("rev|true", labs)]
  if(length(to_flip)>1){
    mins <- apply(dat %>% select(all_of(to_flip)), 2, function(x) min(na.omit(x)))
    maxs <- apply(dat %>% select(all_of(to_flip)), 2, function(x) max(na.omit(x)))
    comp <- bind_rows(mins, maxs) %>% t() %>% as.data.frame() %>% mutate(v=to_flip)
    
    plus1 <- comp$v[comp$V1==-1 & comp$V2==0]
    plus5 <- comp$v[comp$V1==-4 & comp$V2==-1]
    plus6 <- comp$v[comp$V1==-5 & comp$V2==-1]
    plus7 <- comp$v[comp$V1==-7 & comp$V2==0]
    
    if(length(plus1)>0){ dat <- dat %>% mutate(across(plus1, function(x) x+1)) }
    if(length(plus5)>0){ dat <- dat %>% mutate(across(plus5, function(x) x+5)) }
    if(length(plus6)>0){ dat <- dat %>% mutate(across(plus6, function(x) x+6)) }
    if(length(plus7)>0){ dat <- dat %>% mutate(across(plus7, function(x) x+7)) }
  }
  
  inner.function <- function(x){
    vec <- as.numeric(unlist(dat %>% select(x)))
    return(data.frame(x, Mean=mean(na.omit(vec)), SD=sd(na.omit(vec)), Min=min(na.omit(vec)), Max=max(na.omit(vec))))
  }
  
  out <- bind_rows(lapply(vars, inner.function)) %>% mutate(Index=lab_index) %>% bind_cols(Label=labs) %>% relocate(Index, Label) %>% select(-x) 
  if(length(out$Index)>1){  out$Index[2:length(out$Index)] <- " " }
  return(out)
}

families <- c("pod_takeup","treat_knowledge","future_takeup","discernment","conspiracy","verify_know",
              "attention","trust_sm","consume_sm","verifies","share","covid","govt")

tab <- bind_rows(lapply(families, function(x) gen.summ(x)))

out_tab <- datasummary_df(tab, col.names=colnames(tab), format="latex", escape=F, align="llcccc", title="Outcome variables\\label{Table_2}") %>% 
  kable_styling(position = "center", font_size = 8) 
save_kable(out_tab, file = "Tables/Table 2.tex", escape=F, format="latex", escape=F) 


################################################################################
### REGRESSION FUNCTION ########################################################
################################################################################

icw_positive <- unique(map$index[map$direction_pos==1 & map$index!=""])
icw_negative <- unique(map$index[map$direction_neg==1 & map$index!=""])

gen.reg <- function(dv, version, survey, lag, ctrl, het){
  print(dv)
  if(version==0){d$t <- as_factor(d$t0)}
  if(version==0.2){d$t <- as_factor(d$t0_2)}
  if(version==5){d$t <- as_factor(d$t5)}

  if(survey=="Baseline"){ # just used for attrition
   x <- d
   dv_lag_set <- ""; dv_lag <- ""
   lab <- "Attrition"
  }
  if(survey=="Endline"){
    x <- d %>% filter(in_endline==1)
    map <- map[!is.na(map$endline_var),]
    dv_lag <- map$baseline_var[map$endline_var==dv]
    lab <- map$lab[map$endline_var==dv]
  }
  x <- x %>% filter(!is.na(t))
  
  if(lag==0){dv_lag_name <- ""}
  if(lag==1){ ## control for BL values
    dv_lag_name <- dv_lag
    dv_lag[dv_lag!=""] <- paste("+",dv_lag[!is.na(dv_lag)])
    dv_lag_set <- dv_lag_name
  }
  if(lag==2){ ## used for het effects
    dv_lag_name <- dv_lag
    dv_lag <- paste("*",het,"+",het,"*(factor(block_id))")
    dv_lag_set <- het
  }
  
  if(ctrl==0){add_ctrl <- ""}
  if(ctrl==1){  # lasso variable selection
    x$y <- as.data.frame(x)[,colnames(x)==dv]
    bl_vars <- na.omit(map$baseline_var[!is.na(map$index) & map$baseline_var!=""])
    mat_full <- dplyr::select(x, any_of(bl_vars), t, block_id, y) 
    mat_full$y <- as.numeric(mat_full$y)
    cols <- paste(colnames(mat_full)[1:(length(colnames(mat_full))-3)], collapse="+") 
    
    fmla_lasso <- as.formula(paste("~",cols,"+ block_id + t")) 
    mat <- as.data.frame(model.matrix(fmla_lasso, as.data.frame(mat_full)))
    mat <- apply(mat, 2, scale)
    
    low_var <- nearZeroVar(mat) ## remove zero-variance and low-variance predictors
    if(length(low_var)>0){mat <- mat[,-low_var]}
    if(length(low_var)==0){mat <- mat}
    
    corr <- cova(as.matrix(mat)) # correlation matrix for all predictors
    corr_high <- findCorrelation(corr, cutoff=.95) # identify and drop highly correlated predictors
    if(length(corr_high)>0){mat <- mat[,-corr_high]}
    if(length(corr_high)==0){mat <- mat}
    
    mat <- as.data.frame(mat) %>% purrr::discard(~all(is.na(.) | . ==""))
    
    keep_case <- complete.cases(mat_full)

    cv_lambda <- cv.glmnet(as.matrix(mat[keep_case,]),na.omit(x$y),alpha=1,nfolds=5)$lambda.min
    lasso <- glmnet(as.matrix(mat[keep_case,]),na.omit(x$y), alpha=1, lambda=cv_lambda)
    keep_vars <- rownames(lasso$beta)[as.vector(!(lasso$beta==0))]
    keep_vars <- keep_vars[substr(keep_vars,1,1)!="t" & grepl("block_id",keep_vars)==F]
    keep_vars <- keep_vars[!(keep_vars %in% dv_lag_set)]
    
    x <- bind_cols(select(x, any_of(dv), t, any_of(dv_lag_set)), block_id=x$block_id, select(as.data.frame(mat), any_of(keep_vars)))
    x <- x %>% purrr::discard(~all(is.na(.) | . ==""))
    add_ctrl <- paste("+",paste(paste("`",keep_vars[keep_vars %in% colnames(x)],"`",sep=""), collapse="+"))
    add_ctrl[length(keep_vars)==0] <- ""
  }
  
  if(lag==0){fmla <- as.formula(paste(dv, '~ t | block_id'))}
  if(lag>0){
    if(ctrl==0){fmla <- as.formula(paste(dv, '~ t', dv_lag, '| block_id'))}
    if(ctrl==1){fmla <- as.formula(paste(dv, '~ t', dv_lag, add_ctrl, '| block_id'))}
  }
  
  if(length(lab)==0){
    dv_lag_name <- NA
    lab <- NA
  }
  
  reg <- feols(fmla, data=x, vcov="hetero")
  
  sgn_positive <- as.numeric(dv %in% c(icw_positive, paste0("end_icw_",icw_positive)) )
  sgn_negative <- as.numeric(dv %in% c(icw_negative, paste0("end_icw_",icw_negative)) )
  
  if(version==5){ # tests for differences in treatment effects in the disaggregated specification
    t1 <- linearHypothesis(reg, "tShort=tText", white.adjust="hc2") # text vs short
    t2 <- linearHypothesis(reg, "tLong=tText", white.adjust="hc2") # text vs long
    t3 <- linearHypothesis(reg, "tEmpathetic=tText", white.adjust="hc2") # text vs empathetic
    t4 <- linearHypothesis(reg, "tEmpathetic=tShort", white.adjust="hc2") # short vs empathetic
    t5 <- linearHypothesis(reg, "tEmpathetic=tLong", white.adjust="hc2") # long vs empathetic
    t6 <- linearHypothesis(reg, "tLong=tShort", white.adjust="hc2") # short vs long
    
    if(sgn_positive==1 | sgn_negative==1){
      t1_sign <- sign( (reg$coefficients[names(reg$coefficients)=="tShort"]) - (reg$coefficients[names(reg$coefficients)=="tText"]))
      t1_p <- abs(sgn_positive-pt(t1_sign*sqrt(t1$Chisq[2]),t1$Res.Df[2]))
      t2_sign <- sign( (reg$coefficients[names(reg$coefficients)=="tLong"]) - (reg$coefficients[names(reg$coefficients)=="tText"]))
      t2_p <- abs(sgn_positive-pt(t2_sign*sqrt(t2$Chisq[2]),t2$Res.Df[2]))
      t3_sign <- sign( (reg$coefficients[names(reg$coefficients)=="tEmpathetic"]) - (reg$coefficients[names(reg$coefficients)=="tText"]))
      t3_p <- abs(sgn_positive-pt(t3_sign*sqrt(t3$Chisq[2]),t3$Res.Df[2]))
      t4_sign <- sign( (reg$coefficients[names(reg$coefficients)=="tEmpathetic"]) - (reg$coefficients[names(reg$coefficients)=="tShort"]))
      t4_p <- abs(sgn_positive-pt(t4_sign*sqrt(t4$Chisq[2]),t4$Res.Df[2]))
      t5_sign <- sign( (reg$coefficients[names(reg$coefficients)=="tEmpathetic"]) - (reg$coefficients[names(reg$coefficients)=="tLong"]))
      t5_p <- abs(sgn_positive-pt(t5_sign*sqrt(t5$Chisq[2]),t5$Res.Df[2]))
      
      t <- data.frame(t1_p[1], t2_p[1], t3_p[1], t4_p[1], t5_p[1], t6$`Pr(>Chisq)`[2])
      if(sgn_positive==1){colnames(t) <- c("p(Short>Text)","p(Long>Text)","p(Empathetic>Text)","p(Empathetic>Short)","p(Empathetic>Long)","p(Long=Short)")}
      if(sgn_negative==1){colnames(t) <- c("p(Short<Text)","p(Long<Text)","p(Empathetic<Text)","p(Empathetic<Short)","p(Empathetic<Long)","p(Long=Short)")}
    }
    if(sgn_negative==0 & sgn_positive==0){
      t <- data.frame(t1$`Pr(>Chisq)`[2], t2$`Pr(>Chisq)`[2], t3$`Pr(>Chisq)`[2], t4$`Pr(>Chisq)`[2], t5$`Pr(>Chisq)`[2], t6$`Pr(>Chisq)`[2])
      colnames(t) <- c("p(Short=Text)","p(Long=Text)","p(Empathetic=Text)","p(Empathetic=Short)","p(Empathetic=Long)","p(Long=Short)")
    }
    t <- t(t); colnames(t)[1] <- "p"
  }
  
  # additional outputs used in figure and table production
  out <- as.data.frame(coeftable(reg)) %>% mutate(treatment=rownames(.)) %>% relocate(treatment) %>% 
    rename(coef=2, se=3, tval=4, pval=5) %>% mutate(lab=first(lab), dv_lag_name=first(dv_lag_name), N=reg$nobs, survey=survey )
  out <- data.frame(dv, out)
  out$ttext <- substr(out$treatment,2,50)
  out <- out[grepl(paste(unique(x$t),collapse="|"), as.character(out$ttext)),] 
  
  mean_all <- as.data.frame(dplyr::select(x, starts_with(dv)))
  out$mean_all <- mean(na.omit(mean_all[,1]))
  
  mean_dv <- as.data.frame(dplyr::select(x %>% dplyr::filter(t=="Control"), starts_with(dv)))
  out$mean_dv <- mean(na.omit(mean_dv[,1]))
  out$sd_dv <- sd(na.omit(mean_dv[,1]))
  
  if(lag==2){out$het <- het}
  
  out$coef_txt <- as.character(round(out$coef,2))
  out$mean_dv <- as.character(round(out$mean_dv,2))
  out$sd_dv <- as.character(round(out$sd_dv,2))
  
  out$coef_txt[nchar(out$coef_txt)==1] <- paste(out$coef_txt[nchar(out$coef_txt)==1],".",sep="")
  out$coef_txt <- str_pad(out$coef_txt, 4, side="right", pad="0")
  
  out$mean_dv[nchar(out$mean_dv)==1] <- paste(out$mean_dv[nchar(out$mean_dv)==1],".",sep="")
  out$mean_dv <- str_pad(out$mean_dv, 4, side="right", pad="0")
  
  out$sd_dv[nchar(out$sd_dv)==1] <- paste(out$sd_dv[nchar(out$sd_dv)==1],".",sep="")
  out$sd_dv <- str_pad(out$sd_dv, 4, side="right", pad="0")
  
  out$lab_mean <- paste(out$lab, "\nControl mean: ", out$mean_dv,"\nControl SD: ", out$sd_dv, sep="")
  
  is_pos <- out$dv %in% c(map$endline_var[map$direction=="Positive"], icw_positive)
  is_neg <- out$dv %in% c(map$endline_var[map$direction=="Negative"], icw_negative)
  out$correct_direction <- 0
  out$correct_direction[is_pos==T] <- as.numeric(out$coef>0)
  out$correct_direction[is_neg==T] <- as.numeric(out$coef<0)
  out$pre_reg <- as.numeric(is_pos==1|is_neg==1)
  
  if(!(version==5)){out <- list(out, reg_object=reg)}
  if(version==5){out <- list(out, t, reg_object=reg)}
  return(out)
}

################################################################################
### PLOT FUNCTION ##############################################################
################################################################################

gen.plot <- function(dv){
  
  reg <- gen.reg(dv, version=5, survey="Endline", lag=1, ctrl=1)
  
  out_disagg <- reg[[1]] %>% mutate(type="Disaggregated")
  reg_object <- reg[["reg_object"]]
  names(reg_object) <- unique(out_disagg$lab_mean)
  
  # for pod_takeup, use the "Pooled podcast" treatment vector (0.2) #
  if(grepl("pod_takeup",dv)==T){out_pooled <- gen.reg(dv, version=0.2, survey="Endline", lag=1, ctrl=1)[[1]] %>% mutate(type="Pooled")}
  if(grepl("pod_takeup",dv)==F){out_pooled <- gen.reg(dv, version=0, survey="Endline", lag=1, ctrl=1)[[1]] %>% mutate(type="Pooled")}

  out <- bind_rows(out_pooled,out_disagg)

  out$dv <- fct_rev(out$dv) 
  out$lab <- paste(out$lab)
  out$lab_mean <- fct_rev(factor(out$lab_mean, levels=unique(out$lab_mean)))
  
  out$treatment <- substr(as.character(out$treatment),2,nchar(as.character(out$treatment)))
  out$treatment[out$treatment=="Placebo incentives"] <- "Placebo\nincentives"
  out$treatment[out$treatment=="Pooled treatment"] <- "Pooled\ntreatment"
  out$treatment[out$treatment=="Pooled podcast"] <- "Pooled\npodcast"
  out$treatment <- gsub("-","\n(",out$treatment)
  out$treatment <- factor(out$treatment, levels=unique(out$treatment))
  
  # account for one-tailed tests in the plots according to pre-registered hypotheses #
    is_pos <- out$dv %in% c(map$endline_var[map$direction=="Positive"], icw_positive)
    is_neg <- out$dv %in% c(map$endline_var[map$direction=="Negative"], icw_negative)
    out$correct_direction <- 0
    out$correct_direction[is_pos==T] <- as.numeric(out$coef>0)
    out$correct_direction[is_neg==T] <- as.numeric(out$coef<0)
    
    out$pre_reg <- as.numeric(is_pos==1|is_neg==1)
    
    p95 <- qnorm(0.975)
    p90 <- qnorm(0.95)
    p80 <- qnorm(0.90)
    
    out$lower_90[out$correct_direction==0] <- out$coef[out$correct_direction==0] - (p90*out$se[out$correct_direction==0])
    out$upper_90[out$correct_direction==0] <- out$coef[out$correct_direction==0] + (p90*out$se[out$correct_direction==0])
    out$lower_95[out$correct_direction==0] <- out$coef[out$correct_direction==0] - (p95*out$se[out$correct_direction==0])
    out$upper_95[out$correct_direction==0] <- out$coef[out$correct_direction==0] + (p95*out$se[out$correct_direction==0])
    
    out$lower_90[out$correct_direction==1] <- out$coef[out$correct_direction==1] - (p80*out$se[out$correct_direction==1])
    out$upper_90[out$correct_direction==1] <- out$coef[out$correct_direction==1] + (p80*out$se[out$correct_direction==1])
    out$lower_95[out$correct_direction==1] <- out$coef[out$correct_direction==1] - (p90*out$se[out$correct_direction==1])
    out$upper_95[out$correct_direction==1] <- out$coef[out$correct_direction==1] + (p90*out$se[out$correct_direction==1])
    
    min_ci <- min(round(out$coef-p95*out$se,2),0)
    max_ci <- round(max(out$coef+p95*out$se),2)+0.05
    out_txt_add <- NULL

  # text for the comparison tests to add to the bottom panels #
    out_txt <- dplyr::select(as.data.frame(reg[[2]]), 1)
    out_txt_p <- as.character(round(out_txt$p,2))
    out_txt_p[out_txt_p=="0"] <- "0.00"
    out_txt_p[out_txt_p=="1"] <- "1.00" 
    out_txt_p[nchar(out_txt_p)==3] <- str_pad(out_txt_p[nchar(out_txt_p)==3],4,"right","0")
    out_txt_add <- paste(rownames(out_txt), out_txt_p, sep=" = ")
    out_txt_add <- paste(out_txt_add, collapse="\n")
  
  # fixing the placement of the comparison tests #
    add_me_1 <- NULL
    add_me_2 <- theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank())
    offset <- 0
    offset[!is.null(out_txt_add)] <- 0.175 
    if(first(grepl("icw_pod_takeup",out$dv)==T)){offset <- 0.45}
    add_me_3 <- annotate("text", x = 1, y = max_ci+offset, label = out_txt_add, hjust = 1, size=3)
    y_add <- 0
    y_add[!is.null(out_txt_add)] <- 0.2
    if(mean(grepl("pod_takeup",out$dv)>0)){y_add <- 0.5}
    dodge_w <- 1.1
  
  out_pooled <- out %>% filter(type == "Pooled")
  out_disagg <- out %>% filter(type == "Disaggregated")

  # Disaggregated panel #
  p1 <- ggplot(data=out_disagg, aes(x=lab,y=coef, group=fct_rev(treatment), colour=fct_rev(treatment), label=fct_rev(treatment)))+
    theme_bw()+geom_hline(aes(yintercept=0),lty=2,colour='grey30')+
    geom_point(position = position_dodge2(width=dodge_w))+
    scale_color_viridis(discrete=T, option="viridis", end=0.8, guide=guide_legend(reverse=T))+
    ylim(c(min_ci-0.02,max_ci+y_add))+
    geom_errorbar(aes(ymax=lower_95,ymin=upper_95), width=0.125, position = position_dodge(width=dodge_w))+
    geom_errorbar(aes(ymax=lower_90,ymin=upper_90), width=0.25, position = position_dodge(width=dodge_w)) + 
    coord_flip()+ylab(NULL)+xlab(NULL)+
    add_me_1 + add_me_2 + add_me_3 +
    theme(legend.title = element_blank()) + geom_label(position = position_dodge2(width=1.1), size=2.75, label.size=0.5)+
    theme(legend.title = element_blank(), legend.position="none", 
          panel.grid.major.y = element_blank())

  # Pooled panel #
  p2 <- ggplot(data=out_pooled, aes(x=lab,y=coef, group=fct_rev(treatment), colour=fct_rev(treatment), label=fct_rev(treatment)))+ 
      theme_bw()+geom_hline(aes(yintercept=0),lty=2,colour='grey30')+
      scale_color_viridis(discrete=T, option="viridis", end=0.8, guide=guide_legend(reverse=T))+
      scale_fill_viridis(discrete=T, option="viridis", end=0.8, guide=guide_legend(reverse=T))+
      geom_point(position = position_dodge2(width=dodge_w))+ 
      ylim(c(min_ci-0.02,max_ci+y_add))+ 
      geom_errorbar(aes(ymax=lower_95,ymin=upper_95), width=0.125/1.5, position = position_dodge(width=dodge_w))+ 
      geom_errorbar(aes(ymax=lower_90,ymin=upper_90), width=0.25/1.75, position = position_dodge(width=dodge_w))+ 
      coord_flip()+ylab(NULL)+xlab(NULL)+
      add_me_2 + 
      theme(legend.title = element_blank(),legend.position="none",panel.grid.major.y = element_blank(),
            axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
      geom_label(position = position_dodge2(width=1.1), size=2.75, label.size=0.5)
  
  p2 <- p2 + theme(plot.margin = margin(margin(b=-1,t=1,l=1,r=1))) + facet_grid(rows=vars(type), switch="y") + theme(strip.background =element_rect(fill="white"))
  p1 <- p1 + theme(plot.margin = margin(margin(t=0,b=1,l=1,r=1))) + facet_grid(rows=vars(type), switch="y") + theme(strip.background =element_rect(fill="white"))

  return(ggarrange(p2, p1, heights=c(1.75,3.5), padding=0))
}

w <- 5.5
h <- 2.75

# Figures in Manuscript #

ggsave("Figures/Figure 4a.pdf", plot=gen.plot("end_icw_pod_takeup"), width=w, height=h)
ggsave("Figures/Figure 4b.pdf", plot=gen.plot("end_icw_treat_knowledge"), width=w, height=h)
ggsave("Figures/Figure 4c.pdf", plot=gen.plot("end_icw_future_takeup"), width=w, height=h)
ggsave("Figures/Figure 5a.pdf", plot=gen.plot("end_icw_discernment"), width=w, height=h)
ggsave("Figures/Figure 5b.pdf", plot=gen.plot("end_icw_conspiracy"), width=w, height=h)
ggsave("Figures/Figure 6a.pdf", plot=gen.plot("end_icw_verify_know"), width=w, height=h)
ggsave("Figures/Figure 6b.pdf", plot=gen.plot("end_icw_attention"), width=w, height=h)
ggsave("Figures/Figure 6c.pdf", plot=gen.plot("end_icw_trust_sm"), width=w, height=h)
ggsave("Figures/Figure 7a.pdf", plot=gen.plot("end_icw_consume_sm"), width=w, height=h)
ggsave("Figures/Figure 7b.pdf", plot=gen.plot("end_icw_verifies"), width=w, height=h)
ggsave("Figures/Figure 7c.pdf", plot=gen.plot("end_icw_share"), width=w, height=h)
ggsave("Figures/Figure 8a.pdf", plot=gen.plot("end_icw_covid"), width=w, height=h)
ggsave("Figures/Figure 8b.pdf", plot=gen.plot("end_icw_govt"), width=w, height=h)

# Figures in Appendix #

ggsave("Figures/Figure D1a.pdf", plot=gen.plot("end_icw_discernment_t"), width=w, height=h)
ggsave("Figures/Figure D1b.pdf", plot=gen.plot("end_icw_discernment_f"), width=w, height=h)
ggsave("Figures/Figure D2.pdf", plot=gen.plot("end_icw_challenge"), width=w, height=h)
ggsave("Figures/Figure D3a.pdf", plot=gen.plot("end_icw_verify_sources_which_WCW"), width=w, height=h)
ggsave("Figures/Figure D3b.pdf", plot=gen.plot("end_icw_verify_sources_which_FC"), width=w, height=h)
ggsave("Figures/Figure D3c.pdf", plot=gen.plot("end_icw_verify_sources_which_On"), width=w, height=h)
ggsave("Figures/Figure D3d.pdf", plot=gen.plot("end_icw_verify_sources_which_Tr"), width=w, height=h)
ggsave("Figures/Figure E1.pdf", plot=gen.plot("end_icw_fake_news"), width=w, height=h)
ggsave("Figures/Figure E2.pdf", plot=gen.plot("end_icw_alert_fakenews"), width=w, height=h)
ggsave("Figures/Figure E3a.pdf", plot=gen.plot("end_icw_trust_sm_true"), width=w, height=h)
ggsave("Figures/Figure E3b.pdf", plot=gen.plot("end_icw_trust_sm_content"), width=w, height=h)
ggsave("Figures/Figure E4a.pdf", plot=gen.plot("end_icw_trust_trad_media"), width=w, height=h)
ggsave("Figures/Figure E4b.pdf", plot=gen.plot("end_icw_trust_close"), width=w, height=h)
ggsave("Figures/Figure E5a.pdf", plot=gen.plot("end_icw_consume_trad_media"), width=w, height=h)
ggsave("Figures/Figure E5b.pdf", plot=gen.plot("end_icw_consume_close"), width=w, height=h)
ggsave("Figures/Figure E6.pdf", plot=gen.plot("end_icw_capacity"), width=w, height=h)
ggsave("Figures/Figure E7.pdf", plot=gen.plot("end_icw_populist"), width=w, height=h)
ggsave("Figures/Figure E8.pdf", plot=gen.plot("end_fc_requests_any"), width=w, height=h)

################################################################################
# TABLE PRODUCTION #############################################################
################################################################################

### FUNCTIONS ##################################################################

order.models <- function(input){ # used to order models in the table production code
  input <- input[order(names(input))]
  input_ex <- input[!grepl("ICW", names(input))]
  input_ex <- input_ex[order(names(input_ex))]
  input_icw <- input[grepl("ICW", names(input))]
  out <- c(input_icw, input_ex)
  return(out)  
}

rename.coefs <- function(old_names) { # used to relabel coefficients in the table production code
  new_names <- old_names
  new_names <- sapply(new_names, function(x) na.omit(map$lab[map$baseline_var==x])[1])
  new_names[is.na(new_names)] <- names(new_names[is.na(new_names)])    
  new_names <- str_replace(new_names, "^t", "")
  new_names[new_names=="Text"] <- "Text information"
  new_names[new_names=="Short"] <- "Short podcast"
  new_names[new_names=="Long"] <- "Long podcast"
  new_names[new_names=="Empathetic"] <- "Empathetic podcast"
  new_names <- paste("\\hspace{0.1cm}", new_names)
  setNames(new_names, old_names)
}

gen.col.index <- function(dim){ # used to produce column numbers in the table production code
  x <- 1:dim
  out <- c(" ", paste(" (",x,") ",sep=""))
  return(out)
}

adjust.p.val <- function(x,input_model,input_data){ # used to adjust p-values according to pre-registration in the table production code
  m <- input_model[[x]]
  correct <- input_data[[x]]$correct_direction
  correct <- c(correct, rep(0,length(m$tidy$term)-length(correct)))
  m$tidy$p.value[correct==1] <- m$tidy$p.value[correct==1]/2
  return(m)
}

adjust.p.val.bh <- function(x,input_model,l){ # used to adjust p-values for BH correction
  m <- input_model[[x]]
  if(x%%2!=0){ # odd-indexed (without ctrls)
    p_raw <- unlist(lapply(input_model[seq(1, length(input_model), by = 2)], function(x) x$tidy[2:l,]$p.value))
    p_adj <- p.adjust(p_raw, method = "BH")
    p_adj <- split(p_adj, gl(length(input_model)/2, l-1))
    names(p_adj) <- paste0("v",seq(1, length(input_model), by = 2))
  }
  if(x%%2==0){ # even-indexed (with ctrls)
    p_raw <- unlist(lapply(input_model[seq(2, length(input_model), by = 2)], function(x) x$tidy[2:l,]$p.value))
    p_adj <- p.adjust(p_raw, method = "BH")
    p_adj <- split(p_adj, gl(length(input_model)/2, l-1))
    names(p_adj) <- paste0("v",seq(2, length(input_model), by = 2))
  }
  m$tidy$p.value[2:l] <- p_adj[[paste0("v",as.character(x))]]
  return(m)
}

### Balance table (Table C2) ###################################################

  dem_bl <- unique(map$index[grepl("balance_",map$index)==T])
  dem_bl <- map$baseline_var[map$index%in%dem_bl & map$baseline_var!=""]

  icw_bl <- d %>% select(starts_with("icw_")) %>% colnames()
  icw_bl <- icw_bl[!(icw_bl%in%c("icw_verify_know_1","icw_verify_know_2"))]
  
  bal_vars <- c(dem_bl,icw_bl)
  
  # Pooled estimation
  models_t0 <- lapply(bal_vars, function(x) gen.reg(x,version=0,survey="Endline",lag=0,ctrl=0)[["reg_object"]])
  f_t0 <- bind_rows(lapply(models_t0, function(x) wald(x, "Placebo|Pooled", vcov="hetero")))
  
  # Disaggregated estimation
  models_t5 <- lapply(bal_vars, function(x) gen.reg(x,version=5,survey="Endline",lag=0,ctrl=0)[["reg_object"]])
  f_t5 <- bind_rows(lapply(models_t5, function(x) wald(x, "Placebo|Text|Short|Long|Empathetic", vcov="hetero")))
  
  f <- bind_cols(p0=f_t0$p, p5=f_t5$p)
  
  names <- data.frame(baseline_var=bal_vars)
  names <- left_join(names, select(map, baseline_var, lab), by="baseline_var")
  
  f$varname <- names$lab
  f <- f %>% relocate(varname)
  
  f$p0 <- paste("[",str_pad(as.character(round(f$p0,3)),"0",side="right",width=5),"]",sep="")
  f$p5 <- paste("[",str_pad(as.character(round(f$p5,3)),"0",side="right",width=5),"]",sep="")

  f$varname <- gsub("^", "\\\\hspace{0.1cm}", f$varname)
  colnames(f) <- c("Variable", "$p(\\bm{\\tau}_{pooled}=0)$", "$p(\\bm{\\tau}_{disagg.}=0)$")
  
  rows <- as_tibble(bind_rows(data.frame(var1="\\textit{A. Socio-demographic}",var2="",var3=""), 
                              data.frame(var1="\\textit{B. Baseline survey responses}",var2="",var3="")))
  attr(rows, 'position') <- c(1, length(dem_bl)+2)
  
  out_tab <- datasummary_df(f, col.names=colnames(f), format="latex", escape=F, align="lcc", add_rows=rows, title="Balance on pre-treatment outcomes\\label{Table_C2}") %>% 
    add_footnote("\\Balance", threeparttable = T, escape=F, notation="none") %>% kable_styling(position = "center", font_size = 10)
  
  save_kable(out_tab, file = "Tables/Table C2.tex", escape=F, format="latex", escape=F) 

### MAIN ANALYSIS TABLES #######################################################

make.results.table <- function(vars,survey,summary,title,ref){

  if(summary==0){ outvars <- vars }
  if(summary==1){
    outvars <- as.character(map$endline_var[map$index==gsub("end_icw_","",vars)])
    outvars <- c(vars, outvars)
    outvars <- outvars[outvars!=""]
  }
    
  # Pooled results (top panel) #################################################
  
  if(mean(grepl("pod_takeup",outvars)==T)==0){pooled_version<-0}
  if(mean(grepl("pod_takeup",outvars)==T)>0){pooled_version<-0.2}
  
  pooled1 <- lapply(outvars, function(x) gen.reg(x, pooled_version, survey, lag=1, ctrl=1))
  names <- lapply(pooled1, function(x) unique(x[[1]]$lab_mean))
  pooled1_reg <- lapply(pooled1, function(x) x[[2]])
  names(pooled1_reg) <- names
  pooled1_data <- lapply(pooled1, function(x) x[[1]])
  names(pooled1_data) <- names
  
  if("in_endline" %in% vars){pooled0 <- lapply(outvars, function(x) gen.reg(x, pooled_version, survey, lag=0, ctrl=0))}
  if(!("in_endline" %in% vars)){pooled0 <- lapply(outvars, function(x) gen.reg(x, pooled_version, survey, lag=1, ctrl=0))}

  pooled0_reg <- lapply(pooled0, function(x) x[[2]])
  names(pooled0_reg) <- names
  pooled0_data <- lapply(pooled0, function(x) x[[1]])  
  names(pooled0_data) <- names
  
  models0 <- c(pooled0_reg, pooled1_reg)
  models0 <- order.models(models0)
  
  # Pulling out relevant information for the bottom of the panels #
  data0 <- c(lapply(pooled0_data, function(x) x%>% select(correct_direction)),  
             lapply(pooled1_data, function(x) x%>% select(correct_direction)))  
  data0 <- order.models(data0)

  # adjust p-values conditional on pre-registration
  models0 <- modelsummary(models0, output = "modelsummary_list") 
  models0 <- lapply(1:length(models0), function(x) adjust.p.val(x,models0,data0))

  # adjust for multiple testing (B)
  if(mean(grepl("^fig",vars)==T)==1){ models0 <- lapply(1:length(models0), function(x) adjust.p.val.bh(x,models0,2)) }
  
  out_tab_pooled <- modelsummary(models0, escape=F, coef_rename = rename.coefs, 
                                 coef_omit = "^(?!tP|tT|tS|tL|tE)", gof_omit=".*",
                                 output="data.frame",estimate="{estimate}",statistic=c("({std.error})","[{p.value}]")) %>% select(2, 4:(length(models0)+3))
  
  colnames(out_tab_pooled) <- c("name",paste("var",1:length(models0),sep=""))
  out_tab_pooled$name[((1:length(out_tab_pooled$name))-1) %% 3 != 0] <- ""
  
  # Disaggregated results (bottom panel) #######################################
  
  disagg1 <- lapply(outvars, function(x) gen.reg(x, version=5, survey, lag=1, ctrl=1))
  names <- lapply(disagg1, function(x) unique(x[[1]]$lab_mean))
  disagg1_reg <- lapply(disagg1, function(x) x[[3]])
  names(disagg1_reg) <- names
  disagg1_data <- lapply(disagg1, function(x) x[[1]])
  names(disagg1_data) <- names
  
  if("in_endline" %in% vars){disagg0 <- lapply(outvars, function(x) gen.reg(x, version=5, survey, lag=0, ctrl=0))}
  if(!("in_endline" %in% vars)){disagg0 <- lapply(outvars, function(x) gen.reg(x, version=5, survey, lag=1, ctrl=0))}
  
  disagg0_reg <- lapply(disagg0, function(x) x[[3]])
  names(disagg0_reg) <- names
  disagg0_data <- lapply(disagg0, function(x) x[[1]])  
  names(disagg0_data) <- names
  
  models <- c(disagg0_reg, disagg1_reg)
  models <- order.models(models)
  
  # Pulling out relevant information for the bottom of the panels #
  data0 <- c(lapply(disagg0_data, function(x) x %>% select(lab_mean,correct_direction, pre_reg)), 
             lapply(disagg1_data, function(x) x %>% select(lab_mean,correct_direction, pre_reg)))
  data0 <- order.models(data0)
  
  m_all <- sapply(str_split(names(models),pattern="mean: |\nControl SD"),function(x)x[2])
  sd_all <- sapply(str_split(names(models),pattern="SD: "),last)
  
  vec_mean <- setNames(m_all, paste("var",1:length(models),sep=""))
  vec_sd <- setNames(sd_all, paste("var",1:length(models),sep=""))
  
  has_ctrl <- sapply(1:length(models), function(x) length(names(models[[x]]$coefficients))>(6))
  
  length_of_treatment <- sapply(1:length(models), function(x) sum(startsWith(names(models[[x]]$coefficients),"t")))
  length_of_treatment <- min(length_of_treatment)
  
  vec_control <- NA
  vec_control[has_ctrl==T] <- "$\\checkmark$"
  vec_control[has_ctrl==F] <- "$\\times$"
  vec_control <- setNames(vec_control, paste("var",1:length(models),sep=""))
  
  vec_prereg <- c(rep(min(data0[[1]]$pre_reg),length(models)))
  vec_prereg[vec_prereg == 1] <- "$\\checkmark$"
  vec_prereg[vec_prereg == 0] <- "$\\times$"
  vec_prereg <- setNames(vec_prereg, paste("var",1:length(models),sep=""))
  
  header_A <- tibble(name="\\textit{A. Pooled estimation}")
  header_B <- tibble(name="\\textit{B. Disaggregated estimation}")
  
  rows <- as_tibble(t(vec_control)) %>% bind_rows(as_tibble(t(vec_prereg))) %>% bind_rows(as_tibble(t(vec_mean))) %>% bind_rows(as_tibble(t(vec_sd)))  %>% mutate(name = as.character(1:4)) %>% 
    relocate(name) %>% mutate(name = case_when(name == "1" ~ "\\midrule Controls", name == "2" ~ "Directional hypothesis", name == "3" ~ "Control Mean", name == "4" ~ "Control SD"))
  rows <- bind_rows(header_A, out_tab_pooled, header_B, rows) 
  rows[is.na(rows)] <- ""
  
  attr(rows, 'position') <- c(1,1+(1:dim(out_tab_pooled)[1]), 2+dim(out_tab_pooled)[1],2+dim(out_tab_pooled)[1]+c(((3*length_of_treatment)+1):((3*length_of_treatment)+4)))
  names(models) <- gsub("\\n.*", "", names(models))
  
  # splitting column titles across lines #
  var_names <- c(" ",unique(names(models)))
  var_names.split <- str_split(var_names,pattern=" ")
  
  where.are.spaces <- str_locate_all(pattern=" ", var_names)
  where.to.split <- sapply(1:length(var_names), function(x) first(which(where.are.spaces[[x]][,1]>16)))
  table_title <- paste(title,"\\label{",ref,"}",sep="")
  
  for(i in 1:length(var_names)){
    if(length(where.to.split[[i]])==0){var_names.split[[i]] <- var_names.split[[i]]}
    else{var_names.split[[i]][where.to.split[[i]]] <- paste("\\\\\\\\",var_names.split[[i]][where.to.split[[i]]],sep="")}
  }
  
  var_names <- sapply(var_names.split, function(x) paste(x, collapse=" "))
  var_names <- paste("\\\\specialcell{",var_names,"}")
  var_names[1] <- " "
  var_names <- gsub(" [(]part 1[)]","",var_names)
  var_names <- gsub(" [(]part 2[)]","",var_names)
  var_names <- gsub("ICW: How verify (use sources)","How verify (use sources)",var_names)
  
  myHeader <- c(1, rep(2,length(unique(names(models)))))
  names(myHeader) <- var_names

  gm <- tibble::tribble(
    ~raw,~clean,~fmt,
    "r.squared","R$^2$", 2,
    "nobs","Observations", 0,
  )
  
  align_txt <- c("l",rep("c",length(models)))
  align_txt <- paste(align_txt,collapse="")
  
  width_col_1 <- "4.75cm"
  width_col_c <- "1.5cm"
  font_size_c <- 10
  
  # lower column width and font size conditional on model size
  if(length(models)>=12){width_col_c <- "0.8cm"}
  if(length(models)>=12){font_size_c <- 6}
  if(length(models)>=12){width_col_1 <- "2cm"}
  
  if(length(models)>=10 & length(models)<12){width_col_c <- "1cm"}
  if(length(models)>=10 & length(models)<12){font_size_c <- 6.5}
  if(length(models)>=10 & length(models)<12){width_col_1 <- "2.2cm"}
  
  if(length(models)>=8 & length(models)<10){width_col_c <- "1cm"}
  if(length(models)>=8 & length(models)<10){font_size_c <- 7.5}
  if(length(models)>=8 & length(models)<10){width_col_1 <- "2.4cm"}
  
  models <- modelsummary(models, output = "modelsummary_list") # redefine as a modelsummary_list
  models <- lapply(1:length(models), function(x) adjust.p.val(x, models, data0))

  # adjust for multiple testing (BH)
  if(mean(grepl("^fig",vars)==T)==1){ models0 <- lapply(1:length(models0), function(x) adjust.p.val.bh(x,models,5)) }
  
  out_tab <- modelsummary(models, col.names=gen.col.index(length(models)), escape=F, coef_omit = "^(?!tP|tT|tS|tL|tE)", 
                          align=align_txt,estimate="{estimate}",statistic=c("({std.error})","[{p.value}]"),
                          coef_rename = rename.coefs, gof_map = gm, add_rows = rows, format="latex",
                          title=table_title, position="!htpb",modelsummary_format_numeric_latex = "plain") %>% 
    add_header_above(header = myHeader, escape=F)  %>%  
    column_spec(1, width = width_col_1) %>% column_spec(2:(length(models)+1),width=width_col_c) %>% 
    row_spec(1+dim(out_tab_pooled)[1], hline_after=T) %>% 
    kable_styling(position = "center", font_size = font_size_c) %>%  
    add_footnote("\\Starnote", threeparttable = T, escape=F, notation="none")
  out_tab <- gsub("&lt;0.001","0.000",out_tab)
  
  return(out_tab)
}

make.results.table("in_endline","Baseline",summary=0,title="Attrition",ref="Table_C1") %>% save_kable(file = "Tables/Table C1.tex", escape=F, format="latex")
make.results.table("end_icw_pod_takeup","Endline",summary=1,title="Podcast take-up",ref="Table_F1") %>% save_kable(file = "Tables/Table F1.tex", escape=F, format="latex")
make.results.table("end_icw_treat_knowledge","Endline",summary=1,title="Treatment knowledge",ref="Table_F2") %>% save_kable(file = "Tables/Table F2.tex", escape=F, format="latex")
make.results.table("end_icw_future_takeup","Endline",summary=1,title="Future take-up",ref="Table_F3") %>% save_kable(file = "Tables/Table F3.tex", escape=F, format="latex")
make.results.table("end_icw_discernment","Endline",summary=1,title="Discernment",ref="Table_F4") %>% save_kable(file = "Tables/Table F4.tex", escape=F, format="latex")
make.results.table("end_icw_conspiracy","Endline",summary=1,title="Skepticism of conspiracy theories",ref="Table_F5") %>% save_kable(file = "Tables/Table F5.tex", escape=F, format="latex")
make.results.table("end_icw_verify_know_1","Endline",summary=1,title="Knowledge of verification methods (part 1)",ref="Table_F6_1") %>% save_kable(file = "Tables/Table F6_1.tex", escape=F, format="latex")
make.results.table("end_icw_verify_know_2","Endline",summary=1,title="Knowledge of verification methods (part 2)",ref="Table_F6_2") %>% save_kable(file = "Tables/Table F6_2.tex", escape=F, format="latex")
make.results.table("end_icw_attention","Endline",summary=1,title="Attention to veracity of social media content",ref="Table_F7") %>% save_kable(file = "Tables/Table F7.tex", escape=F, format="latex")
make.results.table("end_icw_trust_sm","Endline",summary=1,title="Trust in social media (besides WhatsApp)",ref="Table_F8") %>% save_kable(file = "Tables/Table F8.tex", escape=F, format="latex")
make.results.table("end_icw_consume_sm","Endline",summary=1,title="Social media consumption",ref="Table_F9") %>% save_kable(file = "Tables/Table F9.tex", escape=F, format="latex")
make.results.table("end_icw_verifies","Endline",summary=1,title="Active verification",ref="Table_F10") %>% save_kable(file = "Tables/Table F10.tex", escape=F, format="latex")
make.results.table("end_icw_share","Endline",summary=1,title="Sharing",ref="Table_F11") %>% save_kable(file = "Tables/Table F11.tex", escape=F, format="latex")
make.results.table("end_icw_covid","Endline",summary=1,title="COVID-19 beliefs and preventative behavior",ref="Table_F12") %>% save_kable(file = "Tables/Table F12.tex", escape=F, format="latex")
make.results.table("end_icw_govt","Endline",summary=1,title="Government attitudes",ref="Table_F13") %>% save_kable(file = "Tables/Table F13.tex", escape=F, format="latex")

### Table with ICW indexes and including controls (Tables F14-F15) #############
################################################################################

fig_4 <- c("end_icw_pod_takeup","end_icw_treat_knowledge","end_icw_future_takeup")
fig_5 <- c("end_icw_discernment","end_icw_conspiracy")
fig_6 <- c("end_icw_verify_know","end_icw_attention","end_icw_trust_sm") 
fig_7 <- c("end_icw_consume_sm","end_icw_verifies","end_icw_share") 
fig_8 <- c("end_icw_covid","end_icw_govt")

vars <- c(fig_4, fig_5, fig_6, fig_7, fig_8)

make.ctrl.table <- function(vars,version,title,ref){
  
  all <- lapply(vars, function(x) gen.reg(x,version,survey="Endline",lag=1,ctrl=1))
  
  models <- lapply(all, function(x) x[["reg_object"]])
  details <- unlist(lapply(all, function(x) x[[1]]$lab_mean[1]))
  
  names(models) <- details
  m_all <- sapply(str_split(names(models),pattern="mean: |\nControl SD"),function(x)x[2])
  sd_all <- sapply(str_split(names(models),pattern="SD: "),last)
  
  vec_mean <- setNames(m_all, paste("var",1:length(models),sep=""))
  vec_sd <- setNames(sd_all, paste("var",1:length(models),sep=""))
  
  # always has controls #
  has_ctrl <- rep(1, length(models))
  
  length_of_treatment <- sapply(1:length(models), function(x) sum(startsWith(names(models[[x]]$coefficients),"t")))
  length_of_treatment <- min(length_of_treatment)
  
  vec_control <- NA
  vec_control[has_ctrl==T] <- "$\\checkmark$"
  vec_control[has_ctrl==F] <- "$\\times$"
  vec_control <- setNames(vec_control, paste("var",1:length(models),sep=""))
  
  rows <- as_tibble(t(vec_control)) %>% bind_rows(as_tibble(t(vec_mean))) %>% bind_rows(as_tibble(t(vec_sd)))  %>% mutate(name = as.character(1:3)) %>% 
    relocate(name) %>% mutate(name = case_when(name == "1" ~ "\\midrule Controls", name == "2" ~ "Control Mean", name == "3" ~ "Control SD"))
  rows[is.na(rows)] <- ""
  
  # put the rows chunk below the longest column #
  n_coefs <- length(unique(unlist(lapply(models, function(x) names(x$coefficients)))))
  attr(rows, 'position') <- ((n_coefs)+1):((n_coefs)+3)
  names(models) <- gsub("\\n.*", "", names(models))
  
  var_names <- c(" ",unique(names(models)))
  var_names.split <- str_split(var_names,pattern=" ")
  
  where.are.spaces <- str_locate_all(pattern=" ", var_names)
  where.to.split <- sapply(1:length(var_names), function(x) first(which(where.are.spaces[[x]][,1]>16))) # first colname
  
  table_title <- paste(title,"\\label{",ref,"}",sep="")
  names(models) <- paste("(",seq(1:length(models)),")",sep="") # just numerical
  
  gm <- tibble::tribble(
    ~raw,~clean,~fmt,
    "r.squared","R$^2$", 2,
    "nobs","Observations", 0,
  )
  
  align_txt <- c("l",rep("c",length(models)))
  align_txt <- paste(align_txt,collapse="")
  
  width_col_1 <- "3.5cm"
  width_col_c <- "1cm"
  font_size_c <- 5
  
  ref_figs <- data.frame(vars) %>% left_join(map %>% select(endline_var,fig), by=c("vars"="endline_var"))
  
  head_txt <- paste("\\\\ref{",ref_figs$fig,"}",sep="")
  head_txt <- c("Figure", head_txt)
  head <- c(1, rep(1,length(models)))
  names(head) <- head_txt
  
  out_tab <- modelsummary(models, col.names=c("",names(models)), escape=F, 
                          align=align_txt, fmt=2,estimate="{estimate} ({std.error})",
                          statistic=NULL, coef_rename = rename.coefs, 
                          gof_map = gm, add_rows = rows, format = "latex",
                          title=table_title, position="!htpb",
                          modelsummary_format_numeric_latex = "plain") %>% 
    add_header_above(header = head, escape=F)  %>%  
    column_spec(1, width = width_col_1) %>% column_spec(2:(length(models)+1),width=width_col_c) %>% 
    kable_styling(position = "center", font_size = font_size_c) %>% 
    add_footnote("\\StarnoteCtrl", threeparttable = T, escape=F, notation="none")
  out_tab <- gsub("&lt;0.001","0.000",out_tab)
  
  return(out_tab)
}

make.ctrl.table(vars, version=0, title="Pooled estimation ICW outcomes (including LASSO-selected covariate coefficients)","Table_F14") %>% save_kable(file = "Tables/Table F14.tex", escape=F, format="latex")
make.ctrl.table(vars, version=5, title="Disaggregated estimation ICW outcomes (including LASSO-selected covariate coefficients)","Table_F15") %>% save_kable(file = "Tables/Table F15.tex", escape=F, format="latex")

### Heterogeneous effects (Figure D4) ##########################################
################################################################################

het_vars <- c("dem_q3_2","dem_q3_3","icw_verify_know") 

gen.het.df <- function(dv, version, het_vars, ctr){
  print(dv)
  
  x <- bind_rows(lapply(het_vars, function(he) gen.reg(dv, version, "Endline", lag=2, ctrl=1, het=he)[[1]]))
  x$t_term <- sapply(str_split(x$ttext, ":"),first)
  x$ttext <- str_replace(x$ttext, x$het, "HE")
  x$ttext <- str_replace(x$ttext,  ":", " x ")
  x$ttext <- factor(x$ttext, levels=sort(unique(x$ttext)))
  
  x$is_het <- as.numeric(grepl(" x ", x$ttext))
  
  x$t_term_type <- NA
  x$t_term_type[!grepl(" x ", x$ttext)] <- "T"
  x$t_term_type[grepl(" x ", x$ttext)] <- "T x HE"
  x$t_term_type <- factor(x$t_term_type, levels=unique(x$t_term_type))
  
  x$het[x$het=="dem_q3_2"] <- "Secondary\neducation"
  x$het[x$het=="dem_q3_3"] <- "Tertiary\neducation"
  x$het[x$het=="icw_verify_know"] <- "Verification\nknowledge"
  
  return(x)
}

out.4 <- bind_rows(lapply(fig_4, function(x) gen.het.df(x,version=0,het_vars))) %>% mutate(fig="Figure 4")
out.5 <- bind_rows(lapply(fig_5, function(x) gen.het.df(x,version=0,het_vars))) %>% mutate(fig="Figure 5")
out.6 <- bind_rows(lapply(fig_6, function(x) gen.het.df(x,version=0,het_vars))) %>% mutate(fig="Figure 6")
out.7 <- bind_rows(lapply(fig_7, function(x) gen.het.df(x,version=0,het_vars))) %>% mutate(fig="Figure 7")
out.8 <- bind_rows(lapply(fig_8, function(x) gen.het.df(x,version=0,het_vars))) %>% mutate(fig="Figure 8")

out.4$label[out.4$dv=="end_icw_pod_takeup"] <- "Podcast take-up"
out.4$label[out.4$dv=="end_icw_treat_knowledge"] <- "Treatment knowledge"
out.4$label[out.4$dv=="end_icw_future_takeup"] <- "Future take-up"

out.5$label[out.5$dv=="end_icw_discernment"] <- "Discernment between\nfake and true"
out.5$label[out.5$dv=="end_icw_conspiracy"] <- "Identification of\nconspiracy theories"

out.6$label[out.6$dv=="end_icw_verify_know"] <- "Knowledge of\nverification methods"
out.6$label[out.6$dv=="end_icw_attention"] <- "Attention to veracity"
out.6$label[out.6$dv=="end_icw_trust_sm"] <- "Trust in social media"

out.7$label[out.7$dv=="end_icw_consume_sm"] <- "Social media\nconsumption"
out.7$label[out.7$dv=="end_icw_verifies"] <- "Active verification"
out.7$label[out.7$dv=="end_icw_share"] <- "Sharing"

out.8$label[out.8$dv=="end_icw_covid"] <- "COVID-19 beliefs and\npreventative behavior"
out.8$label[out.8$dv=="end_icw_govt"] <- "Views and attitudes\nabout government"

make.het.plot <- function(input){
  out <- input %>% filter(t_term=="Pooled treatment")
  out$dv <- factor(out$dv, levels=unique(out$dv))
  out$label <- factor(out$label, levels=unique(out$label))
  out$t_lab[out$t_term_type=="T"] <- "Pooled treatment"
  out$t_lab[out$t_term_type=="T x HE"] <- "Pooled treatment x [Variable]"

  p <- ggplot(data=out, aes( x=fct_rev(label), y=coef, group=fct_rev(t_lab),colour=fct_rev(t_lab)) )+
    theme_bw()+geom_hline(aes(yintercept=0),lty=2,colour='grey30')+
    geom_point(position = position_dodge2(width=0.5))+
    geom_errorbar(aes(ymax=coef-(1.96*se),ymin=coef+(1.96*se)), width=0.125, position = position_dodge(width=0.5))+
    geom_errorbar(aes(ymax=coef-(1.645*se),ymin=coef+(1.645*se)), width=0.25, position = position_dodge(width=0.5))+
    scale_color_viridis(discrete=T, option="viridis", end=0.8, guide=guide_legend(reverse=T))+
    facet_grid(cols=vars(het),rows=vars(fig))+
    theme(legend.position = "top", legend.title = element_blank())+
    theme(strip.background =element_rect(fill="white"))+
    coord_flip()+ylab(NULL)+xlab(NULL)
  return(p)
}

ggsave("Figures/Figure D4.pdf",
       plot=ggarrange(make.het.plot(out.4),
                      make.het.plot(out.5)+theme(strip.text.x = element_blank(),legend.position="none"),
                      make.het.plot(out.6)+theme(strip.text.x = element_blank(),legend.position="none"),
                      make.het.plot(out.7)+theme(strip.text.x = element_blank(),legend.position="none"),
                      make.het.plot(out.8)+theme(strip.text.x = element_blank(),legend.position="none")+ylab("Coefficient estimate"),
                      ncol=1),
       width=8, height=8)

### Aggregating outcomes (Table F16) ###########################################
################################################################################

icw_positive <- c(icw_positive, map$endline_var[grepl("^fig",map$endline_var)])

d <- d %>% mutate(end_icw_trust_sm = -end_icw_trust_sm, 
                  end_icw_consume_sm = -end_icw_consume_sm,
                  end_icw_share = -end_icw_share) %>% # reverse vars to ensure ICW indexes go same way for aggregation
  mutate(fig4 = rowSums(across(all_of(fig_4))),
         fig5 = rowSums(across(all_of(fig_5))),
         fig6 = rowSums(across(all_of(fig_6))),
         fig7 = rowSums(across(all_of(fig_7))),
         fig8 = rowSums(across(all_of(fig_8)))) %>% 
  mutate( across( starts_with("fig"), function(x) (x-mean(na.omit(x[t0==1]))) / sd(na.omit(x[t0==1])) ))

make.results.table(c("fig4","fig5","fig6","fig7","fig8"),"Endline",summary=0,title="Aggregating ICW indexes from each figure",ref="Table_F16") %>% save_kable(file = "Tables/Table F16.tex", escape=F, format="latex")
